{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "9a9299ff",
   "metadata": {},
   "source": [
    "### Calculate FPKM and FBNC over expected readthrough region for candidate DELs and DUPs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "16c3f2e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import pysam \n",
    "from pathlib import Path \n",
    "import numpy as np\n",
    "from scipy.stats import spearmanr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5ee1ec8b",
   "metadata": {},
   "outputs": [],
   "source": [
    "wkdir = \"/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3\"\n",
    "wkdir_path = Path(wkdir)\n",
    "\n",
    "# inputs \n",
    "misexp_vrnt_feat_path = wkdir_path.joinpath(\"6_misexp_dissect/vrnt_features/misexp_vrnt_features.tsv\")\n",
    "covariates_path = \"/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/lof_missense/phenotypes/rna_seq/processed_v97/covariates/master/master_covariates_v97_swapd_depth_fastq_rin_cell_sex_pcs_season_batch_fc_pipelines_updtd.tsv\"\n",
    "rna_id_pass_qc_path = wkdir_path.joinpath(\"1_rna_seq_qc/aberrant_smpl_qc/smpls_pass_qc_4568.csv\")\n",
    "wgs_rna_paired_smpls_path = wkdir_path.joinpath(\"1_rna_seq_qc/wgs_rna_match/paired_wgs_rna_postqc_prioritise_wgs.tsv\")\n",
    "vcf_path=\"/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/lof_missense/data/sv_vcf/filtered_merged_gs_svp_10728.vcf.gz\"\n",
    "tpm_mtx_path = wkdir_path.joinpath(\"1_rna_seq_qc/tpm_mtx/tpm_4568samples_59144genes_smpl_qc.csv\")\n",
    "# deletion and duplication readthrough candidates \n",
    "del_tx_read_vrnt_feat_path = wkdir_path.joinpath(\"6_misexp_dissect/tx_readthrough/deletions/misexp_del_tx_readthrough_candidates.tsv\")\n",
    "dup_tx_read_vrnt_feat_path = wkdir_path.joinpath(\"6_misexp_dissect/tx_readthrough/duplications/misexp_dup_tx_readthrough_candidates.tsv\")\n",
    "# output\n",
    "out_dir = wkdir_path.joinpath(\"6_misexp_dissect/tx_readthrough/combine\")\n",
    "out_dir.mkdir(parents=True, exist_ok=True)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ba1ee1fe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of DEL transcriptional readthrough candidates: 12\n"
     ]
    }
   ],
   "source": [
    "### Deletions \n",
    "# readthrough candidate deletions \n",
    "del_tx_read_vrnt_feat_df = pd.read_csv(del_tx_read_vrnt_feat_path, sep=\"\\t\")\n",
    "del_tx_read = del_tx_read_vrnt_feat_df.vrnt_id.unique()\n",
    "print(f\"Number of DEL transcriptional readthrough candidates: {len(del_tx_read)}\")\n",
    "\n",
    "# load readthrough region read count and coverage across all variants \n",
    "del_readthrough_region_dir = wkdir_path.joinpath(\"6_misexp_dissect/tx_readthrough/deletions/readthrough_region\")\n",
    "all_del_readthrough_region_df_list = []\n",
    "for vrnt_id in del_tx_read: \n",
    "    carriers = del_tx_read_vrnt_feat_df[del_tx_read_vrnt_feat_df.vrnt_id == vrnt_id].rna_id.unique()\n",
    "    vrnt_readthrough_region_path = del_readthrough_region_dir.joinpath(f\"{vrnt_id}/{vrnt_id}_intergenic_cov.tsv\")\n",
    "    vrnt_readthrough_region_df = pd.read_csv(vrnt_readthrough_region_path, sep=\"\\t\")\n",
    "    # annotate carriers and non-carriers \n",
    "    vrnt_readthrough_region_df[\"carrier\"] = np.where(vrnt_readthrough_region_df.rna_id.isin(carriers), 'DEL', \"Non-carrier\")\n",
    "    all_del_readthrough_region_df_list.append(vrnt_readthrough_region_df)\n",
    "    \n",
    "all_del_readthrough_region_df = pd.concat(all_del_readthrough_region_df_list)\n",
    "\n",
    "# load covariates \n",
    "covariates_df = pd.read_csv(covariates_path, sep=\"\\t\")\n",
    "rna_id_read_depth_df = covariates_df[[\"rna_id\", \"RawReadDepth\"]]\n",
    "# add read depth \n",
    "all_del_readthrough_region_df = pd.merge(all_del_readthrough_region_df, \n",
    "                                          rna_id_read_depth_df, \n",
    "                                          on=\"rna_id\", \n",
    "                                          how=\"left\"\n",
    "                                            )\n",
    "# compute FPKM \n",
    "all_del_readthrough_region_df[\"fpkm\"] = 1000000000 * (all_del_readthrough_region_df[\"features\"]/(all_del_readthrough_region_df[\"total_len\"] * all_del_readthrough_region_df[\"RawReadDepth\"]))\n",
    "\n",
    "# calculate FPKM and FBNC z-scores per region \n",
    "for feature in [\"fpkm\", \"cov_fraction\"]: \n",
    "    group_mean = all_del_readthrough_region_df.groupby(\"vrnt_gene_id\")[feature].transform(\"mean\")\n",
    "    group_std = all_del_readthrough_region_df.groupby(\"vrnt_gene_id\")[feature].transform(\"std\")\n",
    "    all_del_readthrough_region_df[f\"{feature}_zscore\"] = (all_del_readthrough_region_df[feature] - group_mean)/group_std\n",
    "all_del_readthrough_region_df[\"SVTYPE\"] = \"DEL\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "551a133f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of DUP transcriptional readthrough candidates: 5\n"
     ]
    }
   ],
   "source": [
    "### Duplications \n",
    "dup_tx_read_vrnt_feat_df = pd.read_csv(dup_tx_read_vrnt_feat_path, sep=\"\\t\")\n",
    "dup_tx_read = dup_tx_read_vrnt_feat_df.vrnt_id.unique()\n",
    "print(f\"Number of DUP transcriptional readthrough candidates: {len(dup_tx_read)}\")\n",
    "\n",
    "# load readthrough region read count and coverage across all variants \n",
    "dup_readthrough_region_dir = wkdir_path.joinpath(\"6_misexp_dissect/tx_readthrough/duplications/readthrough_region\")\n",
    "all_dup_readthrough_region_df_list = []\n",
    "for vrnt_id in dup_tx_read: \n",
    "    carriers = dup_tx_read_vrnt_feat_df[dup_tx_read_vrnt_feat_df.vrnt_id == vrnt_id].rna_id.unique()\n",
    "    vrnt_readthrough_region_path = dup_readthrough_region_dir.joinpath(f\"{vrnt_id}/{vrnt_id}_intergenic_cov.tsv\")\n",
    "    vrnt_readthrough_region_df = pd.read_csv(vrnt_readthrough_region_path, sep=\"\\t\")\n",
    "    # annotate carriers and non-carriers \n",
    "    vrnt_readthrough_region_df[\"carrier\"] = np.where(vrnt_readthrough_region_df.rna_id.isin(carriers), \"DUP\", \"Non-carrier\")\n",
    "    all_dup_readthrough_region_df_list.append(vrnt_readthrough_region_df)\n",
    "    \n",
    "all_dup_readthrough_region_df = pd.concat(all_dup_readthrough_region_df_list)\n",
    "# add read depth \n",
    "all_dup_readthrough_region_df = pd.merge(all_dup_readthrough_region_df, \n",
    "                                              rna_id_read_depth_df, \n",
    "                                              on=\"rna_id\", \n",
    "                                              how=\"left\"\n",
    "                                            )\n",
    "# compute FPKM \n",
    "all_dup_readthrough_region_df[\"fpkm\"] = 1000000000 * (all_dup_readthrough_region_df[\"features\"]/(all_dup_readthrough_region_df[\"total_len\"] * all_dup_readthrough_region_df[\"RawReadDepth\"]))\n",
    "\n",
    "# calculate FPKM and coverage z-scores per region \n",
    "for feature in [\"fpkm\", \"cov_fraction\"]: \n",
    "    group_mean = all_dup_readthrough_region_df.groupby(\"vrnt_gene_id\")[feature].transform(\"mean\")\n",
    "    group_std = all_dup_readthrough_region_df.groupby(\"vrnt_gene_id\")[feature].transform(\"std\")\n",
    "    all_dup_readthrough_region_df[f\"{feature}_zscore\"] = (all_dup_readthrough_region_df[feature] - group_mean)/group_std\n",
    "all_dup_readthrough_region_df[\"SVTYPE\"] = \"DUP\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "87d875c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of transcriptional readthrough candidate variants: 17\n"
     ]
    }
   ],
   "source": [
    "# list of combined transcriptional readthrough-associated variants \n",
    "tx_read_vrnts = set(dup_tx_read).union(set(del_tx_read))\n",
    "print(f\"Total number of transcriptional readthrough candidate variants: {len(tx_read_vrnts)}\")\n",
    "tx_read_vrnts_path = out_dir.joinpath(\"tx_read_vrnts_list.txt\")\n",
    "with open(tx_read_vrnts_path, \"w\") as f_out: \n",
    "    for vrnt_id in tx_read_vrnts: \n",
    "        f_out.write(f\"{vrnt_id}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9b80f80f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# combine DELs and DUPs \n",
    "all_vrnt_readthrough_region_df = pd.concat([all_dup_readthrough_region_df, all_del_readthrough_region_df])\n",
    "# write variant-gene-sample to file for mechanism count\n",
    "tx_read_vrnt_gene_smpl_df = all_vrnt_readthrough_region_df[all_vrnt_readthrough_region_df.carrier != \"Non-carrier\"][[\"vrnt_id\", \"gene_id\", \"rna_id\"]]\n",
    "tx_read_vrnt_gene_smpl_path = out_dir.joinpath(\"tx_read_vrnts_gene_smpl.tsv\")\n",
    "tx_read_vrnt_gene_smpl_df.to_csv(tx_read_vrnt_gene_smpl_path, sep=\"\\t\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ddae6942",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pearson correlation FBNC and read depth: 0.027865948602230295\n",
      "Spearman correlation FBNC and read depth: 0.015193064166358943\n"
     ]
    }
   ],
   "source": [
    "# correlation between fraction bases non-zero coverage (FBNC) and read depth \n",
    "pearson_fbnc_depth = all_vrnt_readthrough_region_df.cov_fraction.corr(all_vrnt_readthrough_region_df.RawReadDepth)\n",
    "print(f\"Pearson correlation FBNC and read depth: {pearson_fbnc_depth}\")\n",
    "spearman_fbnc_depth = all_vrnt_readthrough_region_df.cov_fraction.corr(all_vrnt_readthrough_region_df.RawReadDepth, method=\"spearman\")\n",
    "print(f\"Spearman correlation FBNC and read depth: {spearman_fbnc_depth}\")\n",
    "# no correlation so do not correct for read depth "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "68d84feb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading input VCF ...\n",
      "VCF loaded.\n",
      "Subset VCF to samples with RNA-seq ...\n",
      "Number of samples in VCF with RNA ID and passing QC: 2640\n",
      "Number of RNA IDs passing QC: 2640\n"
     ]
    }
   ],
   "source": [
    "### subset to WGS samples with SV calls\n",
    "rna_id_pass_qc_set = set(pd.read_csv(rna_id_pass_qc_path, sep=\"\\t\", header=None)[0])\n",
    "# egan ID, RNA ID sample links \n",
    "wgs_rna_paired_smpls_df = pd.read_csv(wgs_rna_paired_smpls_path, sep=\"\\t\")\n",
    "egan_ids_with_rna = wgs_rna_paired_smpls_df[wgs_rna_paired_smpls_df.rna_id.isin(rna_id_pass_qc_set)].egan_id.tolist()\n",
    "# load VCF and subset to EGAN IDs with RNA \n",
    "print(\"Loading input VCF ...\")\n",
    "vcf_path = vcf_path\n",
    "vcf = pysam.VariantFile(vcf_path, mode = \"r\")\n",
    "print(\"VCF loaded.\")\n",
    "print(\"Subset VCF to samples with RNA-seq ...\")\n",
    "vcf_samples = [sample for sample in vcf.header.samples]\n",
    "vcf_egan_ids_with_rna = set(egan_ids_with_rna).intersection(set(vcf_samples))\n",
    "vcf.subset_samples(vcf_egan_ids_with_rna)\n",
    "vcf_samples_with_rna = [sample for sample in vcf.header.samples]\n",
    "print(f\"Number of samples in VCF with RNA ID and passing QC: {len(vcf_samples_with_rna)}\")\n",
    "# subset egan ID and RNA ID links to samples with SV calls and passing QC \n",
    "wgs_rna_paired_smpls_with_sv_calls_df = wgs_rna_paired_smpls_df[wgs_rna_paired_smpls_df.egan_id.isin(vcf_samples_with_rna)]\n",
    "# write EGAN-RNA ID pairs to file\n",
    "rna_id_pass_qc_sv_calls = wgs_rna_paired_smpls_with_sv_calls_df.rna_id.unique().tolist()\n",
    "print(f\"Number of RNA IDs passing QC: {len(rna_id_pass_qc_sv_calls)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6d4ab94d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimated number of variants: 17.0\n"
     ]
    }
   ],
   "source": [
    "# subset to WGS samples \n",
    "all_vrnt_readthrough_region_wgs_df = all_vrnt_readthrough_region_df[all_vrnt_readthrough_region_df.rna_id.isin(rna_id_pass_qc_sv_calls)]\n",
    "num_vrnts = all_vrnt_readthrough_region_wgs_df.shape[0]/len(rna_id_pass_qc_sv_calls)\n",
    "print(f\"Estimated number of variants: {num_vrnts}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "90cfa1a5",
   "metadata": {},
   "source": [
    "**Correlation between misexpression and FPKM/coverage z-score for carriers and non-carriers**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "274ea22b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load gene expression levels \n",
    "misexp_tpm_zscore_path = wkdir_path.joinpath(\"1_rna_seq_qc/zscore_tpm_flat/tpm_zscore_4568smpls_8739genes_tpm0.1_frac_5.0perc_flat.csv\")\n",
    "misexp_tpm_zscore_df = pd.read_csv(misexp_tpm_zscore_path, sep=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "10adf69a",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_vrnt_readthrough_region_wgs_tpm_df = pd.merge(all_vrnt_readthrough_region_wgs_df, \n",
    "                                              misexp_tpm_zscore_df, \n",
    "                                              on=[\"rna_id\", \"gene_id\"], \n",
    "                                              how=\"inner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "19d64562",
   "metadata": {},
   "outputs": [],
   "source": [
    "# carrier correlation \n",
    "carrier_vrnt_readthrough_region_wgs_tpm_df = all_vrnt_readthrough_region_wgs_tpm_df[all_vrnt_readthrough_region_wgs_tpm_df.carrier.isin([\"DEL\", \"DUP\"])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d254f078",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Correlation between coverage and misexpression level for carriers: 0.741784060204143, p-value: 5.096828360863981e-05\n",
      "Correlation between FPKM and misexpression level for carriers: 0.7190511709507181, p-value: 0.00011060602344143254\n"
     ]
    }
   ],
   "source": [
    "# carrier correlation \n",
    "carrier_vrnt_readthrough_region_wgs_tpm_df = all_vrnt_readthrough_region_wgs_tpm_df[all_vrnt_readthrough_region_wgs_tpm_df.carrier.isin([\"DEL\", \"DUP\"])]\n",
    "spearman_fpkm_carrier, pval_fpkm_carrier = spearmanr(carrier_vrnt_readthrough_region_wgs_tpm_df.fpkm_zscore.tolist(), carrier_vrnt_readthrough_region_wgs_tpm_df[\"z-score\"].tolist())\n",
    "spearman_cov_carrier, pval_cov_carrier = spearmanr(carrier_vrnt_readthrough_region_wgs_tpm_df.cov_fraction_zscore.tolist(), carrier_vrnt_readthrough_region_wgs_tpm_df[\"z-score\"].tolist())\n",
    "print(f\"Correlation between coverage and misexpression level for carriers: {spearman_cov_carrier}, p-value: {pval_cov_carrier}\")\n",
    "print(f\"Correlation between FPKM and misexpression level for carriers: {spearman_fpkm_carrier}, p-value: {pval_fpkm_carrier}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "60baf122",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Correlation between coverage and misexpression level for non-carriers: 0.05058856493752987, p-value: 8.115974380058901e-27\n",
      "Correlation between FPKM and misexpression level for non-carriers: 0.11708571679438892, p-value: 1.1347571294237164e-136\n"
     ]
    }
   ],
   "source": [
    "# non-carriers correlation \n",
    "noncarrier_vrnt_readthrough_region_wgs_tpm_df = all_vrnt_readthrough_region_wgs_tpm_df[~all_vrnt_readthrough_region_wgs_tpm_df.carrier.isin([\"DEL\", \"DUP\"])]\n",
    "spearman_cov_noncarrier, pval_cov_noncarrier = spearmanr(noncarrier_vrnt_readthrough_region_wgs_tpm_df.cov_fraction_zscore.tolist(), noncarrier_vrnt_readthrough_region_wgs_tpm_df[\"z-score\"].tolist())\n",
    "spearman_fpkm_noncarrier, pval_fpkm_noncarrier = spearmanr(noncarrier_vrnt_readthrough_region_wgs_tpm_df.fpkm_zscore.tolist(), noncarrier_vrnt_readthrough_region_wgs_tpm_df[\"z-score\"].tolist())\n",
    "print(f\"Correlation between coverage and misexpression level for non-carriers: {spearman_cov_noncarrier}, p-value: {pval_cov_noncarrier}\")\n",
    "print(f\"Correlation between FPKM and misexpression level for non-carriers: {spearman_fpkm_noncarrier}, p-value: {pval_fpkm_noncarrier}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c66a4c28",
   "metadata": {},
   "source": [
    "**Readthrough region length**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6ccdcc50",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Median readthrough region length: 14905.0\n",
      "Max readthrough region length: 102943\n"
     ]
    }
   ],
   "source": [
    "vrnt_tx_read_len_df = all_vrnt_readthrough_region_wgs_tpm_df[[\"vrnt_id\", \"total_len\"]].drop_duplicates()\n",
    "print(f\"Median readthrough region length: {vrnt_tx_read_len_df.total_len.median()}\")\n",
    "print(f\"Max readthrough region length: {vrnt_tx_read_len_df.total_len.max()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "60e49c29",
   "metadata": {},
   "source": [
    "**Consequences of readthrough associated variants**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a060b8d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load misexpression-associated variant features \n",
    "misexp_vrnt_feat_df = pd.read_csv(misexp_vrnt_feat_path, sep=\"\\t\")\n",
    "\n",
    "# merge variant-gene pairs \n",
    "vrnt_gene_pairs_tx_read_df = all_vrnt_readthrough_region_wgs_tpm_df[[\"vrnt_id\", \"gene_id\"]].drop_duplicates()\n",
    "vrnt_gene_pairs_tx_read_df.vrnt_id = vrnt_gene_pairs_tx_read_df.vrnt_id.astype(str)\n",
    "\n",
    "vrnts_tx_read_feat_df = pd.merge(misexp_vrnt_feat_df, \n",
    "                                 vrnt_gene_pairs_tx_read_df, \n",
    "                                 on=[\"vrnt_id\", \"gene_id\"], \n",
    "                                 how=\"inner\")\n",
    "vrnts_tx_read_info_df = vrnts_tx_read_feat_df[[\"vrnt_id\", \"gene_id\", \"gene_msc\", \"SVTYPE\"]].drop_duplicates()\n",
    "vrnt_tx_read_count_consq_df = vrnts_tx_read_info_df.groupby([\"SVTYPE\", \"gene_msc\"], as_index=False).vrnt_id.count()\n",
    "vrnt_tx_read_count_consq_df = vrnt_tx_read_count_consq_df.rename(columns={\"vrnt_id\": \"vrnt_count\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "b37fdb2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "consequence_names_dict = {\n",
    "    'no_predicted_effect': \"No predicted effect\",\n",
    "    'non_coding_transcript_exon_variant': \"Non-coding transcript\",\n",
    "    'upstream_gene_variant': \"Upstream (5 kb)\",\n",
    "    'transcript_amplification': \"Transcript amplification\"\n",
    "}\n",
    "vrnt_tx_read_count_consq_df[\"consq_name\"] = vrnt_tx_read_count_consq_df.gene_msc.replace(consequence_names_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "92b10517",
   "metadata": {},
   "outputs": [],
   "source": [
    "vrnt_tx_read_count_consq_path = out_dir.joinpath(\"del_dup_tx_readthrough_consq.tsv\")\n",
    "vrnt_tx_read_count_consq_df.to_csv(vrnt_tx_read_count_consq_path, sep=\"\\t\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28d056a4",
   "metadata": {},
   "source": [
    "***Write results to file***"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "142df470",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_vrnt_readthrough_region_wgs_tpm_path = out_dir.joinpath(\"del_dup_readthrough_region_wgs.tsv\")\n",
    "all_vrnt_readthrough_region_wgs_tpm_df.to_csv(all_vrnt_readthrough_region_wgs_tpm_path, sep=\"\\t\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
